<?php

include("utilities.php");
include("phpHTMLParser.php");

printxmlRequest('crawl');
parseXMLStr();
printxmlResponse('crawl');
	//phpinfo();


function parseXMLStr()
{

	global $HTTP_RAW_POST_DATA, $xmlDoc, $xmlResponse, $xmlMessage, $result, $debug;

	$site    = $_REQUEST['url'];
	$tagName = $_REQUEST['tag'];

	if($site == "")		$site    = "http://www.iit.edu";	
	if($tagName == "") 	$tagName = "home";

	if(strncmp($tagName, "/", 1)!=0)	$tagName = "/".$tagName;
	$xmlResponse .= "<list>";
	crawlIt($site, $tagName, 1);
	$xmlResponse .= "</list>";

}

function crawlIt($site, $tagName, $level)
{
	global $HTTP_RAW_POST_DATA, $xmlDoc, $xmlResponse, $xmlMessage, $result, $debug;

	$content = file_get_contents($site);
        
	$xmlResponse .= "<crawlsite>";
	$xmlResponse .= $site;	
	$xmlResponse .= "</crawlsite>";

        $parser = new phpHTMLParser($content);
        $HTMLObject = $parser->parse_tags(array("a"));
        $aTags = $HTMLObject->getTagsByName("a");


	if($aTags != NULL) {
        foreach ($aTags as $a) {
                if ($a->href != "") {
			$siteName = getFullSite($site, $a->href);
			$title = getFullTitle($siteName, $a->innerHTML);

			if(isSite($siteName)==true) {

				addTags($siteName, $tagName);
				if($level<1)
					crawlIt($siteName, $tagName, $level+1);
			} else {
				$xmlResponse .= "<noncrawlsite>";
				$xmlResponse .= $siteName;	
				$xmlResponse .= "</noncrawlsite>";
			}
                }
        }
	}

}

function addTags($siteName, $tagName)
{
	global $HTTP_RAW_POST_DATA, $xmlDoc, $xmlResponse, $xmlMessage, $result, $debug;

	$metaInfo = get_meta_tags($siteName);
	$title = $metaInfo['title'];
	$keyWords = csv2array($metaInfo['keywords']);

	$xmlResponse .= "<link>".    $siteName ."</link>";
	$command  = "tag add \"" .$siteName. "\" ";

	$numKeyWords = 0;
	foreach($keyWords as $keyWord)
	{
		if($numKeyWords == 3)
			break;
		/* Skip if keywords are not present */
		$keyWord  = trim($keyWord);
		if($keyWord != "" && $keyWord !="IIT" && strlen($keyWord)<12)
		{
			$command .= "\"" .$keyWord.   "\" ";
			$command .= "\"" .$tagName. "/". $keyWord ."\" ";
			$numKeyWords += 1;
		}
	}

	$xmlResponse .= "<numKeyWords>". $numKeyWords  ."</numKeyWords>";
	if($numKeyWords != 0)
	{
		exec($command);
		$xmlResponse .= "<command>". $command  ."</command>";
	}
}

function isSite($siteName)
{
	if( (strncmp($siteName, "http://", 7)==0) ||
		(strncmp($siteName, "https://", 8)==0) )
		return true;
	else
		return false;
}

function getFullTitle($linkpath, $linktext)
{
	//TODO: put the code to get the tags from the page here???
	return htmlspecialchars($linktext);
}
 
function csv2array($str) {
    $str = trim($str);
//    $str = preg_replace('/(\\s*,\\s*)+/', ',', $str);
    $array = explode(",", $str);
    return $array;
}
?>
